enrichMiR

#' getTS
#'
#' @param species character object. Can be "human", "mouse" or "rat"
#'
#' @return TargetScan miRNA target dataframe with family information in metadata()
#'
getTS <- function(species=c("human","mouse","rat")){
  library(S4Vectors)
  species <- match.arg(species)
  
  # assign species ID
  spec <- switch( species,
                  human = 9606,
                  mouse = 10090,
                  rat = 10116 )
  
  
  # download TargetScan miRNA targeting dataset
  tmp <- tempfile()
  download.file(
    "http://www.targetscan.org/vert_72/vert_72_data_download/Summary_Counts.all_predictions.txt.zip", 
    tmp)
  unzip(tmp)
  full <- fread("Summary_Counts.all_predictions.txt") #, sep = "\t", header = TRUE)
  
  # limit to selected species
  sub <- full[full$'Species ID' == spec,]
  sub$score <- as.numeric(as.character(sub$'Cumulative weighted context++ score'))
    
  # generate TargetScan dataframe
  ts <- DataFrame(family = sub$'miRNA family',
                   rep.miRNA = sub$'Representative miRNA',
                   feature = sub$'Gene Symbol',
                   sites = sub$'Total num conserved sites',
                   score = as.numeric(as.character(sub$'Cumulative weighted context++ score'))
                   )
  ts <- DataFrame(
    aggregate(sub[,c("sites","score")], by=ts[,c("family","feature")], FUN=mean)
    )
  

  
  # download TargetScan miRNA families dataset
  tmp <- tempfile()
  download.file(
    "http://www.targetscan.org/vert_72/vert_72_data_download/miR_Family_Info.txt.zip", 
    tmp)
  unzip(tmp)
  full <- fread("miR_Family_Info.txt") #, sep = "\t", header = TRUE)
  
  # limit to selected species
  sub <- full[full$'Species ID' == spec,]
  
  fam <- sub$`Seed+m8`
  names(fam) <- sub$`MiRBase ID`
  
  # add family info to ts dataframe as attribute
  metadata(ts)$families <- fam
  
  # enrichMiR cant handle 0 values for sites feature
  return(ts[ts$sites!=0,])
}
tests <- c("areamir","overlap","michael","KS","KS2","MW")
# all tests except WO:
tests <- c("overlap","michael","mw","ks","ks2","areamir","areamir2")
# all tests
tests <- c("overlap","michael","wo","mw","ks","ks2","gsea","gseamod","modscore","modsites","areamir","areamir2")
tests <- NULL
cores <- 8

# run enrichMiR on all objects of dea.list (high runtime!)

## foreach: doesnt work
library(foreach)
library(doParallel)
# cl <- makeCluster(cores)
# registerDoParallel(cl, cores=cores)
# chunk.size <- length(dea.names)/cores
# test <- foreach(c=1:cores, .combine="rbind") %dopar% {
#   for(i in dea.names[((c-1)*chunk.size+1):(c*chunk.size)]){
#     lapply(names(props.all), FUN=function(j){
#       enrichMiR(DEA=as.data.frame(dea.list[[i]]), TS=TS.list[[i]][[j]], miRNA.expression=mirexpr, 
#                 cleanNames=TRUE, tests=tests)
#       })
#     }
# }
#stopImplicitCluster()
# stopCluster(cl)

## foreach: Pierre's code (works, but 1 core)
res <- foreach( dea=dea.list, 
                TS=unlist(TS.list,recursive=FALSE) ) %dopar% {
                  enrichMiR( as.data.frame(dea), TS=TS, miRNA.expression=mirexpr, 
                             cleanNames=TRUE, tests=tests )
                }

## mclapply (works, but 1 core; all cores when reduced tests var)
## looks like GSEA, WO, mods are problematic
library(parallel)
# multicores on Linux
e.list <- mclapply(dea.names, FUN=function(i){
  lapply(names(props.all), FUN=function(j){
      enrichMiR(DEA=as.data.frame(dea.list[[i]]), TS=TS.list[[i]][[j]], miRNA.expression=mirexpr, 
                cleanNames=TRUE, tests=tests)
      })
  }, mc.cores = cores )
## mcmapply: inspired by Pierre's code
res <- mcmapply(dea=dea.list, 
                TS=unlist(TS.list,recursive=FALSE), 
                FUN=function(x){
                  enrichMiR( as.data.frame(dea), TS=TS, miRNA.expression=mirexpr, 
                             cleanNames=TRUE, tests=tests )
                  }, mc.cores = cores )


## bplapply: doesnt work at all
e.list <- bplapply(dea.names[1:4], FUN=function(i){
  lapply(names(props.all), FUN=function(j){
      enrichMiR(DEA=as.data.frame(dea.list[[i]]), TS=TS.list[[i]][[j]], miRNA.expression=mirexpr,
                cleanNames=TRUE, tests=tests)
      })
  }, BPPARAM = MulticoreParam(cores, progressbar = TRUE) )


## bpmapply: Pierre's code
res <- bpmapply(dea=dea.list, 
                TS=unlist(TS.list,recursive=FALSE), 
                BPPARAM=MulticoreParam(4, threshold="TRACE"), FUN=function(x){
                  enrichMiR( as.data.frame(dea), TS=TS, miRNA.expression=mirexpr, cleanNames=TRUE, tests=tests )
                  } )


## serial run
e.list <- lapply(dea.names, FUN=function(i){
  lapply(names(props.all), FUN=function(j){
      enrichMiR(DEA=as.data.frame(dea.list[[i]]), TS=TS.list[[i]][[j]], miRNA.expression=mirexpr, 
                cleanNames=TRUE, tests=tests)
      })
  })


# naming
names(e.list) <- dea.names
for(i in dea.names){
  names(e.list[[i]]) <- names(props.all)
}

# use this between parallel runs
gc(verbose = T)

Benchmarking

Plots

## Score analysis:  detPPV

## Score analysis:  FP.atFDR05

## Score analysis:  log10QDiff

## Score analysis:  TP.atFDR05

scores over the datasets

## $`let-7a`
## original       20       30       40       50 
##    0.806    0.806    0.806    0.806    0.779 
## 
## $`miR-1`
## original       20       30       40       50 
##    0.834    0.834    0.834    0.834    0.834 
## 
## $`miR-124`
## original       20       30       40       50 
##    0.834    0.834    0.816    0.612    0.410 
## 
## $`miR-137`
## original       20       30       40       50 
##    0.806    0.806    0.806    0.797    0.603 
## 
## $`miR-139`
## original       20       30       40       50 
##    0.504    0.430    0.301    0.270    0.146 
## 
## $`miR-143`
## original       20       30       40       50 
##    0.851    0.851    0.851    0.843    0.809 
## 
## $`miR-144`
## original       20       30       40       50 
##    0.851    0.516    0.421    0.299    0.247 
## 
## $`miR-153`
## original       20       30       40       50 
##    0.806    0.788    0.621    0.505    0.391 
## 
## $`miR-155`
## original       20       30       40       50 
##    0.817    0.812    0.813    0.811    0.823 
## 
## $`miR-182`
## original       20       30       40       50 
##    0.806    0.447    0.422    0.409    0.316 
## 
## $`miR-199a`
## original       20       30       40       50 
##    0.851    0.817    0.813    0.759    0.651 
## 
## $`miR-204`
## original       20       30       40       50 
##    0.817    0.798    0.802    0.791    0.773 
## 
## $`miR-205`
## original       20       30       40       50 
##    0.851    0.806    0.765    0.711    0.571 
## 
## $`miR-216b`
## original       20       30       40       50 
##    0.756    0.534    0.326    0.143    0.126 
## 
## $`miR-223`
## original       20       30       40       50 
##    0.851    0.851    0.851    0.851    0.772 
## 
## $`miR-7`
## original       20       30       40       50 
##    0.851    0.838    0.842    0.840    0.816

scores over methods

## $aREAmir
## original       20       30       40       50 
##    1.000    0.927    0.893    0.842    0.661 
## 
## $aREAmir2
## original       20       30       40       50 
##    0.969    0.857    0.788    0.728    0.570 
## 
## $EN.up
## original       20       30       40       50 
##    0.005    0.005    0.005    0.005    0.005 
## 
## $EN.down
## original       20       30       40       50 
##    1.000    0.893    0.833    0.764    0.653 
## 
## $EN.combined
## original       20       30       40       50 
##    0.950    0.847    0.801    0.715    0.568 
## 
## $wEN.up
## original       20       30       40       50 
##    0.005    0.005    0.005    0.005    0.005 
## 
## $wEN.down
## original       20       30       40       50 
##    1.000    0.920    0.900    0.794    0.767 
## 
## $wEN.combined
## original       20       30       40       50 
##    1.000    0.924    0.882    0.791    0.755 
## 
## $michael.up
## original       20       30       40       50 
##    0.005    0.005    0.005    0.005    0.005 
## 
## $michael.down
## original       20       30       40       50 
##    1.000    0.903    0.837    0.752    0.707 
## 
## $michael.combined
## original       20       30       40       50 
##    1.000    0.913    0.857    0.834    0.750 
## 
## $MW
## original       20       30       40       50 
##    0.939    0.834    0.745    0.694    0.544 
## 
## $KS
## original       20       30       40       50 
##    0.847    0.747    0.679    0.656    0.468 
## 
## $KS2
## original       20       30       40       50 
##    0.958    0.910    0.864    0.758    0.715 
## 
## $GSEA
## original       20       30       40       50 
##    0.840    0.756    0.748    0.706    0.713 
## 
## $GSEAmodified
## original       20       30       40       50 
##    0.672    0.556    0.512    0.505    0.495 
## 
## $modSites
## original       20       30       40       50 
##    0.938    0.872    0.817    0.787    0.697 
## 
## $modScore
## original       20       30       40       50 
##    0.943    0.898    0.862    0.831    0.771 
## 
## $combTest.1
## original       20       30       40       50 
##    1.000    0.934    0.892    0.827    0.753 
## 
## $combTest.2
## original       20       30       40       50 
##    1.000    0.934    0.872    0.806    0.742

TP ratio at FDR 0.05 over methods

## $aREAmir
## original       20       30       40       50 
##    1.000    0.875    0.833    0.750    0.583 
## 
## $aREAmir2
## original       20       30       40       50 
##    0.875    0.771    0.729    0.625    0.438 
## 
## $EN.up
## original       20       30       40       50 
##    0.125    0.021    0.104    0.000    0.021 
## 
## $EN.down
## original       20       30       40       50 
##        1        1        1        1        1 
## 
## $EN.combined
## original       20       30       40       50 
##        1        1        1        1        1 
## 
## $wEN.up
## original       20       30       40       50 
##        0        0        0        0        0 
## 
## $wEN.down
## original       20       30       40       50 
##        1        1        1        1        1 
## 
## $wEN.combined
## original       20       30       40       50 
##        1        1        1        1        1 
## 
## $michael.up
## original       20       30       40       50 
##        0        0        0        0        0 
## 
## $michael.down
## original       20       30       40       50 
##    1.000    1.000    0.979    0.917    0.917 
## 
## $michael.combined
## original       20       30       40       50 
##    1.000    1.000    1.000    0.979    0.917 
## 
## $MW
## original       20       30       40       50 
##    1.000    1.000    0.958    0.979    0.938 
## 
## $KS
## original       20       30       40       50 
##    1.000    1.000    0.979    0.979    0.958 
## 
## $KS2
## original       20       30       40       50 
##    0.938    0.917    0.812    0.688    0.729 
## 
## $GSEA
## original       20       30       40       50 
##    0.188    0.125    0.125    0.125    0.188 
## 
## $GSEAmodified
## original       20       30       40       50 
##    0.438    0.438    0.438    0.458    0.500 
## 
## $modSites
## original       20       30       40       50 
##    0.938    0.917    0.854    0.917    0.833 
## 
## $modScore
## original       20       30       40       50 
##    1.000    0.979    0.958    0.958    0.917 
## 
## $combTest.1
## original       20       30       40       50 
##        1        1        1        1        1 
## 
## $combTest.2
## original       20       30       40       50 
##    1.000    1.000    1.000    0.958    0.979